package data_deepprocessing.algorithm.evaluation;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

import data_deepprocessing.util.TxtOperate;




/**
 * 用于对实体抽取结果进行评价
 * 这里已经经过测试是可以用的
 * @author 
 *
 */
public class EvaluateResult {

	private final String tagFileName;
	private final String standardFileName;
	private File evaluateFileName;

	/**
	 * 构造函数，传递文件
	 *
	 * @param tagFileName
	 * @param standardFileName
	 */
	public EvaluateResult(String tagFileName, String standardFileName,File evaluateFile )  {
		this.tagFileName = tagFileName;
		this.standardFileName = standardFileName;
		this.evaluateFileName=evaluateFile;
	}

	/**
	 * 计算评价指标
	 * @throws Exception 
	 */
	public ResultBean evaluateResult(String tagType) throws Exception {

		ResultBean ResultBean = new ResultBean();
		double accuracyB;
		double accuracy;
		double recall;
		double f1;
		double correctB=0;//标注开始正确的个数
		// 正确抽取的实体个数
		double correct = 0;
		// 抽取的实体总数
		double extract = 0;
		// 标准病历中实体的个数
		double total = 0;
		File tagFile = new File(tagFileName);//标注test结果
		File standardFile = new File(standardFileName);//人工审核后的test结果
		BufferedReader tagFileReader = new BufferedReader(new FileReader(tagFile));
		BufferedReader standardFileReader = new BufferedReader(new FileReader(standardFile));
		String tagline = tagFileReader.readLine();
		String standardline = standardFileReader.readLine();
		int tagFileTotailLine = TxtOperate.getTotalLines(tagFile);
		int standardFileTotailLine = TxtOperate.getTotalLines(standardFile);
		boolean isBegin = false;
		int countLine = 0;
		while(tagline != null) {
			String[] tArr = tagline.split("\t");
			while(standardline.equals("\t") || standardline.equals("")) {
				standardline = standardFileReader.readLine();
			}
			String[] sArr = standardline.split("\t");
			if(sArr.length != 2||(tArr.length != 2)){
				standardline = standardFileReader.readLine();
				tagline = tagFileReader.readLine();
				continue;
			}
			//tag == standard B-S
			if(!isBegin && tArr[1].startsWith("B-"+tagType) && tArr[1].equals(sArr[1])) { 
				extract++;
				correctB++;//标注开始正确的个数
				isBegin = true;
			//tag != standard B-S
			}else if(!isBegin && tArr[1].startsWith("B-"+tagType) && !(tArr[1].equals(sArr[1]))){
				extract++;
			//在结尾处 tag == standard E-S
			}else if(isBegin && (countLine==tagFileTotailLine-1)&&(countLine==standardFileTotailLine-1)&&tArr[1].startsWith("E-"+tagType) && tArr[1].equals(sArr[1])){
				correct++;
			//在任何位置只要有一个这样就可以吗？
			}else if(isBegin&&tArr[1].startsWith("E-"+tagType) && tArr[1].equals(sArr[1])){
				isBegin=true;
			//如果碰到了结尾在+1
			}else if(isBegin && tArr[1].equals("O") && tArr[1].equals(sArr[1])) {
				correct++;
				isBegin = false;
			}else if(isBegin && tArr[1].startsWith("B") && tArr[1].equals(sArr[1])){
				if(tArr[1].equals("B-"+tagType)) {
					correctB++;
					extract++;
				}
				correct++;
				isBegin = true;
			}else {
				isBegin = false;
			}
			// 统计标准病历中的实体的个数
			if(sArr[1].equals("B-"+tagType)) {
				total++;
			}
			tagline = tagFileReader.readLine();
			standardline = standardFileReader.readLine();
			System.out.println(correctB+"----"+correct+"####"+extract);
			System.out.println("第  "+countLine+" 行");
			countLine++;
		}
		ResultBean.setCorrectB(correctB);
		ResultBean.setCorrect(correct);
		ResultBean.setExtract(extract);
		ResultBean.setTotal(total);
		TxtOperate.writeTxtFile("B标注正确的个数："+correctB+"-------正确的"+tagType+"实体"+correct+"------抽出的"+tagType+"实体"+extract+"------标准病历中的"+tagType+"实体个数"+total+"\r\n", evaluateFileName, true);
		if(extract==0.0){
			accuracy=0.0;
		}else{
			accuracy = correct/extract;
		}

		if(total==0.0){
			recall=0.0;
		}else{
			recall = correct/total;
		}

		if(accuracy+recall==0.0){
			f1=0.0;
		}else{
			f1 = (2*accuracy*recall)/(accuracy+recall);
		} 
		accuracyB=correctB/extract;
		TxtOperate.writeTxtFile("计算B标签的准确度是:"+accuracyB+"******计算的"+tagType+"精度:"+accuracy+"******计算的"+tagType+"Recall:"+recall+"******计算的"+tagType+"F值:"+ f1+"\r\n", evaluateFileName, true);
		ResultBean.setAccuracy(accuracy);
		ResultBean.setRecall(recall);
		ResultBean.setF1(f1);	
		ResultBean.setAccuracyB(accuracyB);
		
		if(tagFileReader != null){
			tagFileReader.close();
		}
		if(standardFileReader != null){
			standardFileReader.close();
		}
		return ResultBean;
	}

	public static void main(String[] args){
		String path="D:\\Experiment\\new\\zhangExperimentDataSet20161116\\YuanYuhuExperimentDataSet_noall_yyh";
		String tagFileName=path+"/result/2.txt";
		String standardFileName=path+"/StandardDataSet/2.txt";
		File evaluateFile=TxtOperate.newTxt(path+"/evaluate", "standered");
		
		EvaluateResult er=new EvaluateResult(tagFileName,standardFileName,evaluateFile);
		try {
			er.evaluateResult("S");
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}

}


class TotalResultBean {
	public List<ResultBean> totalList = new ArrayList<ResultBean>();
	public void add(ResultBean ResultBean) {
		totalList.add(ResultBean);
	}
	public List<ResultBean> getTotalList() {
		return totalList;
	}
}
